library(tidyverse)
library(WGCNA)
library(ggeasy)
library(ggpubr)

net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/"
work_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/"
resource_dir = "/pastel/resources/20220203_snRNAseq_AMPAD/"
cell_dir = "/pastel/resources/cell_proportion_SN/"
### Expression data 
load(paste0(expression_dir, "exprdata_byregion.Rdata")) 
expr_matx = as.data.frame(exprData_MF) # Residuals of the expression
colnames(expr_matx) = gsub("(.*)_(.*)", "\\2", colnames(expr_matx))

### Modules from SE
modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

### Eigengenes and average expression 
load(paste0(net_dir, "lv3_moduleEigengenes.Rdata"))
mod_eigengene = lv3_moduleEigengenes$eigengenes
mod_average = lv3_moduleEigengenes$averageExpr

mod_average$projid = gsub("(.*)_(.*)", "\\2", rownames(mod_average)) #get the projid to match with phenotype data 
rownames(mod_average) = mod_average$projid
mod_average$projid = NULL

### Phenotype
# load("/pastel/projects/spatial_t/pseudo_bulk/phenotypes.RData") # phenotypes
# phenotype_dt = phenotypes[match(rownames(mod_average), phenotypes$projid),]
# head(phenotype_dt[, c("projid", "cogng_demog_slope")])

Module membership (MM)

For each gene, we define a “fuzzy” measure of module membership by correlating its gene expression profile with the module eigengene of a given module. Highly connected intramodular hub genes tend to have high module membership values to the respective module. If the value is close to 1 or -1, the gene is highly correlated with the other genes inside the module.

ModuleMembership = as.data.frame(WGCNA::cor(t(expr_matx),
                                            mod_average,
                                            method = "pearson",
                                            use="p")) # this function is faster than the R standand correlation one 

### Get modules >= 30 nodes to show:
to_show = paste0("AE",as.character( modules_size$module[modules_size$n_nodes >= 30])) 
mm_filt = ModuleMembership[, colnames(ModuleMembership) %in% to_show ]

# save(mod_average, ModuleMembership, mm_filt, file = paste0(work_dir, "Mod_membership_MF.Rdata"))

# createDT(ModuleMembership)

Cell counts

Counts from the SN dataset.

# Major cell types 
# cell_prop = read.delim2(paste0(resource_dir, "updated_annotations/cellprop_snRNAseq.txt"), header = T, colClasses = "character", stringsAsFactors = F, check.names = F)

# Sub cell types of SN
cell_prop = read.delim2(paste0(resource_dir, "updated_annotations/subcellprop_snRNAseq.txt"), header = T, colClasses = "character", stringsAsFactors = F, check.names = F)

# mjr_celltypes1 = names(cell_prop)[grep("Exc.", names(cell_prop))]
# mjr_celltypes2 = names(cell_prop)[grep("Inh.", names(cell_prop))]
# mjr_celltypes3 = names(cell_prop)[grep("Oli.", names(cell_prop))]
mjr_celltypes4 = names(cell_prop)[grep("Ast.", names(cell_prop))]
mjr_celltypes5 = names(cell_prop)[grep("Mic.", names(cell_prop))]
# mjr_celltypes6 = names(cell_prop)[grep("OPC.", names(cell_prop))]
# mjr_celltypes7 = names(cell_prop)[grep("End.", names(cell_prop))]

# mjr_celltypes = c(mjr_celltypes1, mjr_celltypes2, mjr_celltypes3, mjr_celltypes4, mjr_celltypes5, mjr_celltypes6, mjr_celltypes7)
glia = c(mjr_celltypes4, mjr_celltypes5)

cell_prop_f = cell_prop[, c("projid",glia) ]
cell_prop_f[,-1] <- sapply(cell_prop_f[,-1], as.numeric)

Gene significance (GS)

The absolute value of the correlation between the gene and the trait. In this case, trait is cell counts.

Shinya: “I wonder if there is any connections between the gene membership of module 3 to the Ast and Mic cells.”

MF_M3 vs…

projid_in_common = cell_prop_f$projid[which(cell_prop_f$projid %in% colnames(expr_matx))]
cell_prop_f_in_common = cell_prop_f[match(projid_in_common,cell_prop_f$projid),] 
expr_mf_m3 = expr_matx[modules_file$ensembl[modules_file$cluster_lv3 == 3], projid_in_common]
# dim(expr_mf_m3)
# dim(cell_prop_f_in_common)

GS_mf_m3 = as.data.frame(WGCNA::cor(t(expr_mf_m3), cell_prop_f_in_common[-1], use = 'pairwise.complete.obs'))

# join tables 
mm_filt_gene = mm_filt
mm_filt_gene$ensembl = rownames(mm_filt_gene)
GS_mf_m3$ensembl = rownames(GS_mf_m3)

mm_gs_merged = mm_filt_gene %>% inner_join(GS_mf_m3, by="ensembl")

Ast.1

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.1)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.1") +
  theme_classic()

Ast.2

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.2)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.2") +
  theme_classic()

Ast.3

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.3)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.3") +
  theme_classic()

Ast.4

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.4)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.4") +
  theme_classic()

Ast.5

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.5)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.5") +
  theme_classic()

Ast.6

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.6)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.6") +
  theme_classic()

Ast.7

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.7)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.7") +
  theme_classic()

Ast.8

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.8)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.8") +
  theme_classic()

Ast.9

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.9)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.9") +
  theme_classic()

Ast.10

ggplot(mm_gs_merged, aes(x=AE3, y=Ast.10)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.10") +
  theme_classic()

Mic.1

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.1)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.1") +
  theme_classic()

Mic.2

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.2)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.2") +
  theme_classic()

Mic.3

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.3)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.3") +
  theme_classic()

Mic.4

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.4)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.4") +
  theme_classic()

Mic.5

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.5)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.5") +
  theme_classic()

Mic.6

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.6)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.6") +
  theme_classic()

Mic.7

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.7)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.7") +
  theme_classic()

Mic.8

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.8)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.8") +
  theme_classic()

Mic.9

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.9)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.9") +
  theme_classic()

Mic.10

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.10)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.10") +
  theme_classic()

Mic.11

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.11)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.11") +
  theme_classic()

Mic.12

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.12)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.12") +
  theme_classic()

Mic.13

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.13)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.13") +
  theme_classic()

Mic.14

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.14)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.14") +
  theme_classic()

Mic.15

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.15)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.15") +
  theme_classic()

Mic.16

ggplot(mm_gs_merged, aes(x=AE3, y=Mic.16)) +
  geom_point() + 
  stat_smooth(method = "lm", se=F) +
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
  easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.16") +
  theme_classic()

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0          ggeasy_0.1.3          WGCNA_1.70-3         
##  [4] fastcluster_1.2.3     dynamicTreeCut_1.63-1 forcats_0.5.1        
##  [7] stringr_1.4.0         dplyr_1.0.8           purrr_0.3.4          
## [10] readr_2.1.2           tidyr_1.2.0           tibble_3.1.6         
## [13] ggplot2_3.4.1         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ggsignif_0.6.3         ellipsis_0.3.2        
##   [4] htmlTable_2.4.0        XVector_0.34.0         base64enc_0.1-3       
##   [7] fs_1.5.2               rstudioapi_0.13        farver_2.1.0          
##  [10] bit64_4.0.5            AnnotationDbi_1.56.2   fansi_1.0.2           
##  [13] lubridate_1.8.0        xml2_1.3.3             codetools_0.2-18      
##  [16] splines_4.1.2          doParallel_1.0.17      cachem_1.0.6          
##  [19] impute_1.68.0          knitr_1.37             polynom_1.4-0         
##  [22] Formula_1.2-4          jsonlite_1.7.3         broom_0.7.12          
##  [25] cluster_2.1.2          GO.db_3.14.0           dbplyr_2.1.1          
##  [28] png_0.1-7              compiler_4.1.2         httr_1.4.2            
##  [31] backports_1.4.1        assertthat_0.2.1       Matrix_1.5-1          
##  [34] fastmap_1.1.0          cli_3.6.1              htmltools_0.5.2       
##  [37] tools_4.1.2            gtable_0.3.0           glue_1.6.1            
##  [40] GenomeInfoDbData_1.2.7 Rcpp_1.0.8             carData_3.0-5         
##  [43] Biobase_2.54.0         cellranger_1.1.0       jquerylib_0.1.4       
##  [46] vctrs_0.6.1            Biostrings_2.62.0      nlme_3.1-153          
##  [49] preprocessCore_1.56.0  iterators_1.0.14       xfun_0.29             
##  [52] rvest_1.0.2            lifecycle_1.0.3        rstatix_0.7.0         
##  [55] zlibbioc_1.40.0        scales_1.2.1           hms_1.1.1             
##  [58] parallel_4.1.2         RColorBrewer_1.1-2     yaml_2.3.5            
##  [61] memoise_2.0.1          gridExtra_2.3          sass_0.4.0            
##  [64] rpart_4.1-15           latticeExtra_0.6-29    stringi_1.7.6         
##  [67] RSQLite_2.2.10         highr_0.9              S4Vectors_0.32.3      
##  [70] foreach_1.5.2          checkmate_2.0.0        BiocGenerics_0.40.0   
##  [73] GenomeInfoDb_1.30.1    rlang_1.1.0            pkgconfig_2.0.3       
##  [76] bitops_1.0-7           matrixStats_0.61.0     evaluate_0.15         
##  [79] lattice_0.20-45        labeling_0.4.2         htmlwidgets_1.5.4     
##  [82] bit_4.0.4              tidyselect_1.1.2       magrittr_2.0.2        
##  [85] R6_2.5.1               IRanges_2.28.0         generics_0.1.2        
##  [88] Hmisc_4.6-0            DBI_1.1.2              mgcv_1.8-38           
##  [91] pillar_1.7.0           haven_2.4.3            foreign_0.8-81        
##  [94] withr_2.5.0            abind_1.4-5            survival_3.2-13       
##  [97] KEGGREST_1.34.0        RCurl_1.98-1.6         nnet_7.3-16           
## [100] car_3.0-12             modelr_0.1.8           crayon_1.5.0          
## [103] utf8_1.2.2             tzdb_0.2.0             rmarkdown_2.11        
## [106] jpeg_0.1-9             grid_4.1.2             readxl_1.3.1          
## [109] data.table_1.14.2      blob_1.2.2             reprex_2.0.1          
## [112] digest_0.6.29          stats4_4.1.2           munsell_0.5.0         
## [115] bslib_0.3.1